Obviously density matters for neural networks

We have two questions that we need to have answered

  1. How does the model boundary change as the,
    1. distance between the two side changes
    2. the number of jags changes
    3. the number of hidden layer neurons
  2. What are the activations of each weight in each layer for the entire data space
  3. How many neurons are activated at the same time for the entire data space
  4. (optional) How do the activations change through each epoch?

1.a.

data, y, cy = create_data(n_samples=5000, p = 10, margin=1, sample_prob = 0.1)
plt.figure(figsize=(8,8))
plt.scatter(data[:,0],data[:,1], c=cy,s=3)
plt.show()

model = run_model(data, y, epochs=10)
model_boundary(model)

data, y, cy = create_data(n_samples=5000, p = 10, margin=5, sample_prob = 0.1)
plt.figure(figsize=(8,8))
plt.scatter(data[:,0],data[:,1], c=cy,s=3)
plt.show()

model = run_model(data, y, epochs=10)
model_boundary(model)

1.b.

data, y, cy = create_data(n_samples=5000, p = 10, margin=2, sample_prob = 0.1)
plt.figure(figsize=(8,8))
plt.scatter(data[:,0],data[:,1], c=cy,s=3)
plt.show()

model = run_model(data, y, epochs=10)
model_boundary(model)

data, y, cy = create_data(n_samples=5000, p = 2, margin=2, sample_prob = 0.1)
plt.figure(figsize=(8,8))
plt.scatter(data[:,0],data[:,1], c=cy,s=3)
plt.show()

model = run_model(data, y, epochs=10)
model_boundary(model)

1.c

data, y, cy = create_data(n_samples=5000, p = 5, margin=5, sample_prob = 0.1)
plt.figure(figsize=(8,8))
plt.scatter(data[:,0],data[:,1], c=cy,s=3)
plt.show()

model = run_model(data, y, epochs=10, layer_sizes=[4,4])
model_boundary(model)

model = run_model(data, y, epochs=10, layer_sizes=[16,16])
model_boundary(model)

2.

data, y, cy = create_data(n_samples=5000, p = 10, margin=5, sample_prob = 0.1)
plt.figure(figsize=(8,8))
plt.scatter(data[:,0],data[:,1], c=cy,s=3)
plt.show()

model = run_model(data, y, epochs=10, layer_sizes=[4,4])
model_boundary(model)

def layer_plot(act, layer_size, lower, upper, title, nrows = 1,):
    fig, axes = plt.subplots(1, layer_size, figsize=(20, 5))
    for i in range(layer_size):
        ax = axes[i]
        ax.imshow(act[:, :, i].cpu().detach().numpy(), 
                extent=(lower, upper, lower, upper), 
                origin='lower', 
                cmap='viridis')
        ax.set_title(f'Weight {i+1}')
        ax.set_xlabel('x-axis')
        ax.set_ylabel('y-axis')
    fig.suptitle(title)
    plt.tight_layout()
    plt.show()

# apply model to the entire data space
def plot_activations(model, layers = [1,2], layer_size = 4, n_samples = 100, lower = -10, upper = 10):
    grid_size = round(np.sqrt(n_samples))
    x = np.linspace(lower, upper, grid_size)
    D = np.array(list(product(x, x)))
    D_tensor = torch.tensor(D, dtype=torch.float32, device = DEVICE)
    Y = model(D_tensor)

    if(1 in layers):
        h1_act = model.activations["h1"].reshape((grid_size, grid_size, layer_size))
        layer_plot(h1_act, layer_size=layer_size, lower=lower, upper=upper, title="Layer 1")
    if(2 in layers):
        h2_act = model.activations["h2"].reshape((grid_size, grid_size, layer_size))
        layer_plot(h2_act, layer_size=layer_size, lower=lower, upper=upper, title="Layer 2")

def plot_active_areas(model, layer_size = 4, n_samples = 100, lower = -10, upper = 10):
    grid_size = round(np.sqrt(n_samples))
    x = np.linspace(lower, upper, grid_size)
    D = np.array(list(product(x, x)))
    D_tensor = torch.tensor(D, dtype=torch.float32, device = DEVICE)
    Y = model(D_tensor)

    h1_act = model.activations["h1"].reshape((grid_size, grid_size, layer_size))
    h2_act = model.activations["h2"].reshape((grid_size, grid_size, layer_size))

    summed_activations = (h1_act + h2_act).sum(dim=2)  # Element-wise sum, retains shape (grid_size, grid_size, layer_size)

    # Plot each layer as a 2D heatmap
    plt.figure(figsize=(8, 8))
    plt.imshow(summed_activations.cpu().detach().numpy(), extent=(lower, upper, lower, upper), 
                origin='lower', 
                cmap='viridis')
    plt.title("Most active areas")
    plt.xlabel("Feature 1")
    plt.ylabel("Feature 2")
    plt.grid(True)
    plt.show()
plot_activations(model)

3.

This sums up the activations across weights and layers to show which areas in the input space causes all the weights to be active.

plot_active_areas(model)

2. and 3. but bigger model

model = run_model(data, y, epochs=10, layer_sizes=[16,16])
model_boundary(model)

plot_activations(model, layer_size=16)

plot_active_areas(model, layer_size=16)

2. and 3. but with bad decision boundary

data, y, cy = create_data(n_samples=5000, p = 10, margin=5, sample_prob = 0.1)
plt.figure(figsize=(8,8))
plt.scatter(data[:,0],data[:,1], c=cy,s=3)
plt.show()

model = run_model(data, y, epochs=10, layer_sizes=[4,4])
model_boundary(model)

plot_activations(model, layer_size=4)

plot_active_areas(model, layer_size=4)